#select variables
data_student<-data_student_raw %>% 
  filter(an_bac!='2021' & an_bac!='2020') %>%
  select(judet_bac,judet_adm,unitate_de_invatamant,liceu_repartizat,school_harmonized,
         an_adm,an_bac,specializare_adm,specializare_bac,specializare_bac2,specializare_lb,school_change,Cod_SIRUTA2_hs_adm,
         Cod_SIRUTA2_hs_bac,entrance_perc,grad_perc,n_hs_town,town_hs_bac,
         entrance_perc,grad_perc,media_la_admitere,rezultat,media,Cod_SIRUTA2_ms_adm,scoala_de_provenienta,town_hs_adm)

#add number of high schools (graduation)
data_student<- data_student %>% 
  filter(an_bac!='2021' & an_bac!='2020') %>%
  group_by(judet_bac,town_hs_bac,an_bac) %>% 
  mutate(n_hs_town=n_distinct(unitate_de_invatamant[!is.na(unitate_de_invatamant) & unitate_de_invatamant!='' & unitate_de_invatamant!='HS NOT MATCHED'])) 


#add nr hs per town groups
data_student <-data_student %>% mutate(n_hs_town_group=as.character(n_hs_town))
data_student <-data_student %>% mutate(n_hs_town_group=ifelse(n_hs_town>=4 & n_hs_town<=15,'4-15',n_hs_town_group))
data_student <-data_student %>% mutate(n_hs_town_group=ifelse(n_hs_town>=16,'16+',n_hs_town_group))
data_student$n_hs_town_group<-with(data_student, reorder(n_hs_town_group, n_hs_town))

#add number of high schools (admission)
data_student <- data_student %>% 
  filter(an_bac!='2021' & an_bac!='2020') %>%
  group_by(judet_adm,town_hs_adm,an_adm) %>% 
  mutate(n_hs_town_adm=n_distinct(liceu_repartizat[!is.na(specializare_adm)  & !is.na(liceu_repartizat) & liceu_repartizat!='']))
 
#add number of middle schools
data_student<-data_student %>% 
  group_by(Cod_SIRUTA2_ms_adm,an_adm) %>%
  mutate(n_ms_town=n_distinct(scoala_de_provenienta[!is.na(specializare_adm)  & !is.na(liceu_repartizat) & liceu_repartizat!='']))



data_student <-data_student %>% mutate(n_hs_town_group_adm=as.character(n_hs_town_adm))
data_student <-data_student %>% mutate(n_hs_town_group_adm=ifelse(n_hs_town_adm>=4 & n_hs_town_adm<=15,'4-15',n_hs_town_group_adm))
data_student <-data_student %>% mutate(n_hs_town_group_adm=ifelse(n_hs_town_adm>=16,'16+',n_hs_town_group_adm))
data_student$n_hs_town_group_adm<-with(data_student, reorder(n_hs_town_group_adm, n_hs_town_adm))

data_student$n_hs_town_adm


#PANE 1: stats/town and school
##1. Number of towns per category
n_towns<-data_student %>% 
  group_by(n_hs_town_group,an_bac) %>% 
  summarise(count=n_distinct(town_hs_bac)) %>%
  group_by(n_hs_town_group) %>% 
  summarise(`Towns (Yearly)`=sprintf("%.1f",mean(count)),sd=sprintf("(%.1f)",sd(count))) %>%
  filter(n_hs_town_group!=0)

#2. number of middle schools per town
n_ms<-data_student %>% 
  filter(!is.na(Cod_SIRUTA2_ms_adm)) %>%
  group_by(n_hs_town_group,an_bac) %>% 
  summarise(count=mean(n_ms_town,na.rm=T)) %>%
  group_by(n_hs_town_group) %>% 
  summarise(`Middle Schools`=sprintf("%.1f",mean(count)),sd=sprintf("(%.1f)",sd(count))) %>%
  filter(n_hs_town_group!=0)

#3. number of hs
n_hs<-data_student %>% 
  group_by(n_hs_town_group,an_bac) %>% 
  summarise(count=mean(n_hs_town,na.rm=T)) %>%
  group_by(n_hs_town_group) %>% 
  summarise(`High Schools`=sprintf("%.1f",mean(count)),sd=sprintf("(%.1f)",sd(count))) %>%
  filter(n_hs_town_group!=0)


# 4. Number of tracks per school
tracks_school<-data_student %>% 
  group_by(n_hs_town_group,town_hs_bac,an_bac,liceu_repartizat,specializare_adm,specializare_lb) %>% 
  summarise() %>%
  filter(!is.na(specializare_adm) & !is.na(specializare_lb) & !is.na(liceu_repartizat) & liceu_repartizat!='') %>%
  group_by(n_hs_town_group,town_hs_bac,an_bac,liceu_repartizat) %>%
  summarise(count=n()) %>%
  group_by(n_hs_town_group) %>%
  summarise(`Tracks (per School)`=sprintf("%.1f",mean(count)),sd=sprintf("(%.1f)",sd(count))) %>%
  filter(n_hs_town_group!=0)

#Pane 2:
#1. matched students. no switchers
n_students_matched <- data_student %>% 
  filter(school_change==F) %>%
  filter(!is.na(specializare_adm) & !is.na(liceu_repartizat) & liceu_repartizat!='') %>%
  filter(!is.na(specializare_bac) & !is.na(unitate_de_invatamant) & !is.na(rezultat) & !is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED') %>%
  group_by(n_hs_town_group) %>% 
  summarise(`Matched Students (Non-Switchers)`=n()) 

#2. matched students. switchers
n_students_matched_switchers <- data_student %>% 
  filter(school_change==T) %>%
  filter(!is.na(specializare_adm) & !is.na(liceu_repartizat) & liceu_repartizat!='') %>%
  filter(!is.na(specializare_bac) & !is.na(unitate_de_invatamant) & !is.na(rezultat) & !is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED') %>%
  group_by(n_hs_town_group) %>% 
  summarise(`Matched Students (Switchers)`=n()) 

#3. unmatched students (adm)
n_students_adm_unmatched<- data_student %>% 
  filter(!is.na(specializare_adm) & !is.na(liceu_repartizat) & liceu_repartizat!='') %>%
  filter(!is.na(specializare_bac) | is.na(unitate_de_invatamant) | is.na(rezultat) | is.na(unitate_de_invatamant) | unitate_de_invatamant!='HS NOT MATCHED') %>%
  group_by(n_hs_town_group) %>% 
  summarise(`Unmatched Students (admissions)`=n()) %>%
  filter(n_hs_town_group!=0)

#4. unmatched students (bac)
n_students_bac_unmatched<- data_student %>% 
  filter(is.na(specializare_adm) | is.na(liceu_repartizat) ) %>%
  filter(!is.na(specializare_bac) & !is.na(unitate_de_invatamant) & !is.na(rezultat) & !is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED') %>%
  group_by(n_hs_town_group) %>% 
  summarise(`Unmatched Students (graduation)`=n()) %>%
  filter(n_hs_town_group!=0)

###############################
#Pane 3:
#Students per town
n_bac_students_town <- data_student %>% 
  filter(school_change==F) %>%
  filter(!is.na(specializare_adm) & !is.na(liceu_repartizat) & liceu_repartizat!='') %>%
  filter(!is.na(specializare_bac) & !is.na(unitate_de_invatamant) & !is.na(rezultat) & !is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED') %>%
  group_by(n_hs_town_group,town_hs_bac,an_bac,judet_bac) %>% 
  summarise(count=n()) %>%
  group_by(n_hs_town_group) %>%
  summarise(`Yearly Exit Exam Students (per Town)`=format(sprintf("%.0f",mean(count))),sd=format(sprintf("(%.0f)",sd(count))))

#students per school
n_bac_students_school <- data_student %>% 
  filter(school_change==F) %>%
  filter(!is.na(specializare_adm) & !is.na(liceu_repartizat) & liceu_repartizat!='') %>%
  filter(!is.na(specializare_bac) & !is.na(unitate_de_invatamant) & !is.na(rezultat) & !is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED') %>%
  group_by(n_hs_town_group,town_hs_bac,an_bac,judet_bac,unitate_de_invatamant) %>% 
  summarise(count=n()) %>%
  group_by(n_hs_town_group) %>%
  summarise(`Yearly Exit Exam Students (per School)`=format(sprintf("%.0f",mean(count))),sd=format(sprintf("(%.0f)",sd(count))))

#students per track
n_bac_students_track <- data_student %>% 
  filter(school_change==F) %>%
  filter(!is.na(specializare_adm) & !is.na(liceu_repartizat) & liceu_repartizat!='') %>%
  filter(!is.na(specializare_bac) & !is.na(unitate_de_invatamant) & !is.na(rezultat) & !is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED') %>%
  group_by(n_hs_town_group,town_hs_bac,an_bac,judet_bac,unitate_de_invatamant,specializare_bac) %>% 
  summarise(count=n()) %>%
  group_by(n_hs_town_group) %>%
  summarise(`Yearly Exit Exam Students (per Track)`=format(sprintf("%.0f",mean(count))),sd=format(sprintf("(%.0f)",sd(count))))


#PANE 4: grades:
n_bac_grades <- data_student %>%
  filter(!is.na(specializare_adm) & !is.na(liceu_repartizat) & liceu_repartizat!='') %>%
  filter(!is.na(specializare_bac)  & !is.na(unitate_de_invatamant) & !is.na(rezultat) & !is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED' ) %>%
  group_by(n_hs_town_group) %>%
  summarise(`Exit Exam Score (Percentile)`=sprintf("%.0f",mean(grad_perc,na.rm=T)*100),sd=sprintf("(%.0f)",sd(grad_perc,na.rm=T)*100))

n_adm_grades <- data_student %>%
  filter(!is.na(specializare_adm) & !is.na(liceu_repartizat) & liceu_repartizat!='') %>%
  filter(!is.na(specializare_bac)  & !is.na(unitate_de_invatamant) & !is.na(rezultat) & !is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED' ) %>%
  group_by(n_hs_town_group) %>%
  summarise(`Admission Exam Score (Percentile)`=sprintf("%.0f",mean(entrance_perc,na.rm=T)*100),sd=sprintf("(%.0f)",sd(entrance_perc,na.rm=T)*100))

n_bac_outcome <- data_student %>% 
  filter(!is.na(specializare_adm) & !is.na(liceu_repartizat) & liceu_repartizat!='') %>%
  filter(!is.na(specializare_bac)  & !is.na(unitate_de_invatamant) & !is.na(rezultat) & !is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED' ) %>%
  group_by(n_hs_town_group) %>%
  summarise(`Exit Exam Pass Rate (%)`=sprintf("%.0f",(1-sum(rezultat %in% c('Neprezentat','NEPREZENTAT','Respins','RESPINS','Eliminat din examen','ELIMINAT DIN EXAMEN'))/n())*100))



n_student<-cbind(n_towns,n_hs[,-1],n_ms[,-1],
                 tracks_school[,-1],
                 n_students_matched[,-1],n_students_matched_switchers[,-1],
                 n_bac_students_town[,-1],n_bac_students_school[,-1],n_bac_students_track[,-1],n_adm_grades[,-1],n_bac_grades[,-1],n_bac_outcome[,-1])








#teacher stats
data_teacher<-data_teacher_raw

data_teacher <- data_teacher %>% 
  group_by(County,town_hs_bac,Year) %>% 
  mutate(n_hs_town_teacher=n_distinct(school_harmonized)) 
data_teacher <-data_teacher %>% mutate(n_hs_town_group_teacher=as.character(n_hs_town_teacher))
data_teacher <-data_teacher %>% mutate(n_hs_town_group_teacher=ifelse(n_hs_town_teacher>=4 & n_hs_town_teacher<=15,'4-15',n_hs_town_group_teacher))
data_teacher <-data_teacher %>% mutate(n_hs_town_group_teacher=ifelse(n_hs_town_teacher>=16,'16+',n_hs_town_group_teacher))
data_teacher$n_hs_town_group_teacher<-with(data_teacher, reorder(n_hs_town_group_teacher, n_hs_town_teacher))

n_teacher_n<-data_teacher %>% 
  filter(!is.na(school_harmonized)) %>%
  group_by(n_hs_town_group_teacher) %>% 
  summarise(`Hired Teachers (Total)`=sprintf("%.0f",n()))

n_teacher_town <- data_teacher %>% 
  filter(!is.na(school_harmonized)) %>%
  group_by(n_hs_town_group_teacher,County,town_hs_bac,Year) %>% 
  summarise(count=n()) %>%
  group_by(n_hs_town_group_teacher) %>%
  summarise(`Yearly Hired Teachers (per Town)`=sprintf("%.1f",mean(count)),sd=sprintf("(%.1f)",sd(count)))


n_teacher_school <- data_teacher %>% 
  filter(!is.na(school_harmonized)) %>%
  group_by(n_hs_town_group_teacher,County,town_hs_bac,Year,school_harmonized) %>% 
  summarise(count=n()) %>%
  group_by(n_hs_town_group_teacher) %>%
  summarise(`Yearly Hired Teachers (per School)`=sprintf("%.1f",mean(count)),sd=sprintf("(%.1f)",sd(count)))

n_teacher_grades <- data_teacher %>%
  filter(!is.na(school_harmonized)) %>%
  group_by(n_hs_town_group_teacher) %>%
  summarise(`Teacher Score (Percentile)`=sprintf("%.2f",mean(teacher_perc)),sd=sprintf("(%.2f)",sd(teacher_perc)))

n_teacher<-cbind(n_teacher_n[,-1],n_teacher_town[,-1],n_teacher_school[,-1],n_teacher_grades[,-1])



#spending stats
#add number of hs
data_exp <- data_exp_raw %>% 
  group_by(judet.bac,town.hs,an) %>% 
  mutate(n_hs_town_exp=n_distinct(unitate_de_invatamant))
data_exp <-data_exp %>% mutate(n_hs_town_group_exp=as.character(n_hs_town_exp))
data_exp <-data_exp %>% mutate(n_hs_town_group_exp=ifelse(n_hs_town_exp>=4 & n_hs_town_exp<=15,'4-15',n_hs_town_group_exp))
data_exp <-data_exp %>% mutate(n_hs_town_group_exp=ifelse(n_hs_town_exp>=16,'16+',n_hs_town_group_exp))
data_exp$n_hs_town_group_exp<-with(data_exp, reorder(n_hs_town_group_exp, n_hs_town_exp))

n_exp_school_purchases <- data_exp %>% 
  filter(!is.na(unitate_de_invatamant)) %>%
  group_by(n_hs_town_group_exp,judet.bac,town.hs,an,unitate_de_invatamant) %>% 
  summarise(count=sum(ValoareEUR[Type=='Direct'],na.rm=T)) %>%
  group_by(n_hs_town_group_exp) %>%
  summarise(`Yearly Expenditures per School`=mean(count),sd=sd(count))

n_exp_town_purchases <- data_exp %>% 
  filter(!is.na(unitate_de_invatamant)) %>%
  group_by(n_hs_town_group_exp,judet.bac,town.hs,an) %>% 
  summarise(count=sum(ValoareEUR[Type=='Direct'],na.rm=T)) %>%
  group_by(n_hs_town_group_exp) %>%
  summarise(`Yearly Expenditures per Town`=mean(count),sd=sd(count))

n_exp_school_contract <- data_exp %>% 
  filter(!is.na(unitate_de_invatamant)) %>%
  group_by(n_hs_town_group_exp,judet.bac,town.hs,an,unitate_de_invatamant) %>% 
  summarise(count=sum(ValoareEUR[Type=='Contract'],na.rm=T)) %>%
  group_by(n_hs_town_group_exp) %>%
  summarise(`Yearly Expenditures per School`=mean(count),sd=sd(count))

n_exp_town_contract<- data_exp %>% 
  filter(!is.na(unitate_de_invatamant)) %>%
  group_by(n_hs_town_group_exp,judet.bac,town.hs,an) %>% 
  summarise(count=sum(ValoareEUR[Type=='Contract'],na.rm=T)) %>%
  group_by(n_hs_town_group_exp) %>%
  summarise(`Yearly Expenditures per Town`=mean(count),sd=sd(count))

n_exp_school <- data_exp %>% 
  filter(!is.na(unitate_de_invatamant)) %>%
  group_by(n_hs_town_group_exp,judet.bac,town.hs,an,unitate_de_invatamant) %>% 
  summarise(count=sum(ValoareEUR,na.rm=T)) %>%
  group_by(n_hs_town_group_exp) %>%
  summarise(`Yearly Expenditures (EUR 000s, per School)`=formatC(mean(count)/1000,format="f",big.mark=",",digits=0),sd=sprintf("(%.0f)",sd(count)/1000))

n_exp_town <- data_exp %>% 
  filter(!is.na(unitate_de_invatamant)) %>%
  group_by(n_hs_town_group_exp,judet.bac,town.hs,an) %>% 
  summarise(count=sum(ValoareEUR,na.rm=T)) %>%
  group_by(n_hs_town_group_exp) %>%
  summarise(`Yearly Expenditures (EUR 000s, per Town)`=formatC(mean(count)/1000,format="f",big.mark=",",digits=0),sd=sprintf("(%.0f)",sd(count)/1000))





#avg exp per student (per school)
adm_students_yr<-data_student %>% group_by(judet_bac,unitate_de_invatamant,an_adm) %>% summarise(n=n())
exp_yr<-data_exp %>% group_by(an,unitate_de_invatamant,judet.bac,n_hs_town_group_exp) %>% summarise(spent=sum(ValoareEUR,na.rm=T))

n_exp_per_student_town<-base::merge(exp_yr,
                                    adm_students_yr,
                                    by.x=c("unitate_de_invatamant","an"),
                                    by.y=c("unitate_de_invatamant","an_adm")) %>% 
    filter(n>10) %>%
    mutate(spent=spent/n) %>%
    group_by(n_hs_town_group_exp) %>%
    summarise(`Yearly Expenditures (per Student)`=  format(sprintf("%.0f",mean(spent))),sd=sprintf("(%.0f)",sd(spent)))


#
adm_students_yr<-data_student %>% 
  ungroup()%>% 
  filter(!is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED' & unitate_de_invatamant!='' ) %>%
  group_by(judet_bac,unitate_de_invatamant,an_adm,town_hs_bac) %>% 
  summarise(n=n())



exp_yr_direct<-data_exp %>% 
  group_by(an,unitate_de_invatamant,judet.bac,n_hs_town_group_exp) %>% 
  filter(Type=='Direct') %>%
  summarise(spent=sum(ValoareEUR,na.rm=T))

n_exp_per_student_town_direct<-base::merge(exp_yr_direct,
                                    adm_students_yr,
                                    by.x=c("unitate_de_invatamant","an"),
                                    by.y=c("unitate_de_invatamant","an_adm")) %>% 
  group_by(n_hs_town_group_exp,town_hs_bac,an) %>%
  filter(n>10) %>%
  summarise(spent=sum(spent,na.rm=T),n=sum(n,na.rm=T),spent=spent/n) %>%
  arrange(-spent) %>%
  group_by(n_hs_town_group_exp) %>%
  summarise(`Yearly Expenditures per Student (Town)`=format(sum(spent*n)/sum(n)),
            sd=sprintf("(%.0f)",sd(spent)))

n_exp_per_student_school_direct<-base::merge(exp_yr_direct,
                                             adm_students_yr,
                                             by.x=c("unitate_de_invatamant","an"),
                                             by.y=c("unitate_de_invatamant","an_adm")) %>%
  group_by(n_hs_town_group_exp,town_hs_bac,an,unitate_de_invatamant) %>%
  filter(n>10) %>%
  summarise(spent=sum(spent,na.rm=T),n=sum(n,na.rm=T),spent=spent/n) %>%
  arrange(-spent) %>%
  group_by(n_hs_town_group_exp) %>%
  summarise(`Yearly Expenditures per Student (School)`=format(sum(spent*n)/sum(n)),
            sd=sprintf("(%.0f)",sd(spent)))

#avg exp per student 
adm_students_yr<-data_student %>% 
  ungroup() %>%
  filter(!is.na(unitate_de_invatamant) & unitate_de_invatamant!='HS NOT MATCHED' & unitate_de_invatamant!='' ) %>%
  group_by(judet_bac,unitate_de_invatamant,an_adm,town_hs_bac) %>% 
  summarise(n=n())
exp_yr<-data_exp %>% 
  group_by(an,unitate_de_invatamant,judet.bac,n_hs_town_group_exp) %>% 
  summarise(spent=sum(ValoareEUR,na.rm=T))

n_exp_per_student_town<-base::merge(exp_yr,
                                    adm_students_yr,
                                    by.x=c("unitate_de_invatamant","an"),
                                    by.y=c("unitate_de_invatamant","an_adm")) %>%
  group_by(n_hs_town_group_exp,town_hs_bac,an) %>%
  filter(n>10) %>%
  summarise(spent=sum(spent,na.rm=T),n=sum(n,na.rm=T),spent=spent/n) %>%
  arrange(-spent) %>%
  group_by(n_hs_town_group_exp) %>%
  summarise(`Yearly Expenditures per Student (Town)`=format(sum(spent*n)/sum(n)),
            sd=sprintf("(%.0f)",sd(spent)))



n_exp_per_student_school<-base::merge(exp_yr_direct,
                                    adm_students_yr,
                                    by.x=c("unitate_de_invatamant","an"),
                                    by.y=c("unitate_de_invatamant","an_adm")) %>%
  group_by(n_hs_town_group_exp,town_hs_bac,an,unitate_de_invatamant) %>%
  filter(n>10) %>%
  summarise(spent=sum(spent,na.rm=T),n=sum(n,na.rm=T),spent=spent/n) %>%
  arrange(-spent) %>%
  group_by(n_hs_town_group_exp) %>%
  summarise(`Yearly Expenditures per Student (School)`=format(sum(spent*n)/sum(n)),
            sd=sprintf("(%.0f)",sd(spent)))


n_exp<-cbind(n_exp_town[,-1],n_exp_school[,-1],
             n_exp_per_student_town[,-1],n_exp_per_student_school[,-1],
             n_exp_per_student_town_direct[,-1],n_exp_per_student_school_direct[,-1])
summary_stats<-cbind(n_student,n_teacher,n_exp)




summary_stats2<-as.data.frame(t(summary_stats[,-1]))
summary_stats2<-cbind(colnames(summary_stats)[-1],summary_stats2)
colnames(summary_stats2)<-c('Number of High Schools in Town',as.character(summary_stats[,1]))
summary_stats2[,1]<-colnames(summary_stats)[2:length(summary_stats)]
summary_stats2[,1][grepl("sd",summary_stats2[,1])]<-''

#write results
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
summary_stats2 %>% kable(format='latex')
write.xlsx(summary_stats2,"summary_stats_v2.xlsx",overwrite=T)


